******************************************************************
*** MERGE AUDITS DATA WITH INPATIENT CLAIMS   	               ***
*** Create master data of inpatient claims from 2007-2016,     ***
*** merged with audit rates. 								   ***
*** Create summary statistics by provider and year:			   ***
***  * discharges per provider-year, by stay chars  		   ***
***  * audit rates per provider-year, using various defs 	   ***
*** Last edited: 1/26/2023                       			   ***
******************************************************************

capture log close
log using "${CodePath}/log/mergeaudits_$S_DATE.log", text replace

// Switches
local mergeaudit_ip	     = 1 // merge audit data with inpatinet claims iteratively
local convert_fyear      = 1 // convert from calendar year to fiscal year (fiscal year = October - September)
local sum_hosp_level     = 1 // sum at the hospital-level
local sum_within_hosp    = 1 // sum at the hospital-characteristic-level (i.e., LOS, DRG)

*--------------------------------------------------
* Merge audit data with claims
*--------------------------------------------------

if `mergeaudit_ip' == 1{
	// Create year-specific audit data and rename variables to be merged with claims data
 	use "${DataPath}/Audit Data/byclaimtype_1_withCCN.dta", clear // available in replication packet
 	gen year_svcend = year(Date_SvcEnd)
	qui sum year_svcend
	local minyear = r(min)
	local maxyear = r(max)
	
	gen byte nonnumericprovider = real(ProviderLegacyNumber)==. 
	gen ProviderLegacyNumber_numeric = ProviderLegacyNumber
	replace ProviderLegacyNumber_numeric = "" if nonnumericprovider
	destring ProviderLegacyNumber_numeric, replace
	gen str6 ProviderLegacyNumber_clean = string(ProviderLegacyNumber_numeric,"%06.0f")
	replace ProviderLegacyNumber = ProviderLegacyNumber_clean if !missing(ProviderLegacyNumber_clean)  & ProviderLegacyNumber_clean != "."
	replace ProviderLegacyNumber = "" if ProviderLegacyNumber == "000000"
	replace ProviderLegacyNumber = trim(ProviderLegacyNumber)

	keep if ProviderLegacyNumber != ""


	duplicates drop InternalClaimID, force

	gen nodx1 = cond(OrigDx1Code == "" | real(OrigDx1Code) == 0, 1, 0)
	drop if nodx1 == 1

	keep ProviderLegacyNumber InternalClaimID Orig_ClaimAmt ReviewType ReviewStatus Date_SvcEnd Date_SvcStart Date_SvcEnd DemandAmt OverUnder OrigDRG FinalDRG OrigDx1Code FinalDx1Code OrigProcCode FinalProcCode Date_InitSelect Date_DemandSent Date_MedRequest Date_Closed Date_NoFindings Date_Discuss* year_svcend
 
	gen audited = 1
	
	 forval y = `minyear'/`maxyear'{
		preserve
			keep if year_svcend == `y'
			rename ProviderLegacyNumber provider
			rename Date_SvcEnd thru_dt
			rename Date_SvcStart from_dt
			save "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", replace
		restore
	}  


	// CREATE CROSSWALK BETWEEN AUDITS AND CLAIMS
	forval y = `minyear'/`maxyear'{
		di "Now processing `y'..."
		use "${McareDataPath}/harm/100pct/ip/`y'/ipc`y'.dta", clear
		keep clm_id provider from_dt thru_dt icd_dgns_cd1 drg_cd icd_prcdr_cd1 pmt_amt
		
		// 1. Identify claims that never show up in the audit data using provider + dates + diagnosis
			// Get list of all provider + dates + diagnosis combos that show up in audit data. If claim is NOT in this list, assign audit = 0
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				*keep InternalClaimID prncpal_dgns_cd from_dt thru_dt provider
				duplicates drop provider icd_dgns_cd1 from_dt thru_dt, force
				tempfile audits_diag
				save `audits_diag'
			restore

			merge m:1 provider icd_dgns_cd1 from_dt thru_dt using `audits_diag'

			// Claim never shows up in audit data --> assign audit == 0
			preserve
				keep if _merge == 1
				gen audit = 0
				drop _merge
				save "${DataPath}/claims/ip100pct_`y'_noaudit.dta", replace
			restore

			// Claim shows up in audit data at least once --> potentially audited
			preserve
				keep if _merge == 3
				drop _merge
				keep clm_id provider from_dt thru_dt icd_dgns_cd1 drg_cd icd_prcdr_cd1 pmt_amt 
				save "${DataPath}/claims/ip100pct_`y'_potaudit.dta", replace
			restore 

		// 2. Among those potentially audited, try matching on different characteristics
			// Identify claims that are uniquely identified in claims data or audits data, do a 1:1 merge, then merge back 
			use "${DataPath}/claims/ip100pct_`y'_potaudit.dta", clear
			
			// 2.1: uniquely ID'd by provider + dates + diagnosis
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt, gen(dup1)
				drop if dup1 > 0
				drop dup1
				tempfile audits_unique1
				save `audits_unique1'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt, gen(dup1)
			preserve
				drop if dup1 > 0
				drop dup1
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt using `audits_unique1'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID1 
				keep InternalClaimID1 clm_id
				tempfile claimaudits_unique1
				save `claimaudits_unique1'
			restore

			merge 1:1 clm_id using `claimaudits_unique1'
			drop _merge
			
			// 2.2: uniquely ID'd by provider + dates + diagnosis + original drg
			destring drg_cd, replace
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen drg_cd = OrigDRG
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup2)
				drop if dup2 > 0
				drop dup2
				tempfile audits_unique2
				save `audits_unique2'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup2)
			preserve
				drop if dup2 > 0
				drop dup2
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt drg_cd using `audits_unique2'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID2 
				keep InternalClaimID2 clm_id
				tempfile claimaudits_unique2
				save `claimaudits_unique2'
			restore

			merge 1:1 clm_id using `claimaudits_unique2'
			gen newmatch = cond(missing(InternalClaimID1) & !missing(InternalClaimID2), 1, 0)
			tab newmatch
			drop newmatch
			gen conflict1_2 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID2) & InternalClaimID1!=InternalClaimID2, 1, 0)
			tab conflict1_2
			drop conflict1_2
			drop _merge

			// 2.3: uniquely ID'd by provider + dates + diagnosis + procedure
			destring icd_prcdr_cd1, replace force

			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				rename OrigProcCode icd_prcdr_cd1
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt icd_prcdr_cd1, gen(dup3)
				drop if dup3 > 0
				drop dup3
				tempfile audits_unique3
				save `audits_unique3'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt icd_prcdr_cd1, gen(dup3)
			preserve
				drop if dup3 > 0
				drop dup3
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt icd_prcdr_cd1  using `audits_unique3'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID3 
				keep InternalClaimID3 clm_id
				tempfile claimaudits_unique3
				save `claimaudits_unique3'
			restore

			merge 1:1 clm_id using `claimaudits_unique3'

			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & !missing(InternalClaimID3), 1, 0)
			tab newmatch
			drop newmatch
			gen conflict1_3 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID3) & InternalClaimID1!=InternalClaimID3, 1, 0)
			tab conflict1_3
			drop conflict1_3
			
			drop _merge

			// 2.4: uniquely ID'd by provider + dates + diagnosis + final drg
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen drg_cd = FinalDRG
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup4)
				drop if dup4 > 0
				drop dup4
				tempfile audits_unique4
				save `audits_unique4'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup4)
			preserve
				drop if dup4 > 0
				drop dup4
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt drg_cd using `audits_unique4'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID4 
				keep InternalClaimID4 clm_id
				tempfile claimaudits_unique4
				save `claimaudits_unique4'
			restore

			merge 1:1 clm_id using `claimaudits_unique4'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & !missing(InternalClaimID4), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_4 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID4) & InternalClaimID1!=InternalClaimID4, 1, 0)
			tab conflict1_4
			drop conflict1_4
			drop _merge

			// 2.5: uniquely ID'd by provider + dates  + original drg
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen drg_cd = OrigDRG
				duplicates tag provider from_dt thru_dt drg_cd, gen(dup5)
				drop if dup5 > 0
				drop dup5
				tempfile audits_unique5
				save `audits_unique5'
			restore

			duplicates tag provider from_dt thru_dt drg_cd, gen(dup5)
			preserve
				drop if dup5 > 0
				drop dup5
				merge 1:1 provider from_dt thru_dt drg_cd using `audits_unique5'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID5 
				keep InternalClaimID5 clm_id
				tempfile claimaudits_unique5
				save `claimaudits_unique5'
			restore

			merge 1:1 clm_id using `claimaudits_unique5'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & !missing(InternalClaimID5), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_5 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID5) & InternalClaimID1!=InternalClaimID5, 1, 0)
			tab conflict1_5
			drop conflict1_5
			drop _merge

			// 2.6: uniquely ID'd by provider + dates  + final drg
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen drg_cd = FinalDRG
				keep if drg_cd != 0
				duplicates tag provider from_dt thru_dt drg_cd, gen(dup6)
				drop if dup6 > 0
				drop dup6
				tempfile audits_unique6
				save `audits_unique6'
			restore

			duplicates tag provider from_dt thru_dt drg_cd, gen(dup6)
			preserve
				drop if dup6 > 0
				drop dup6
				merge 1:1 provider from_dt thru_dt drg_cd using `audits_unique6'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID6 
				keep InternalClaimID6 clm_id
				tempfile claimaudits_unique6
				save `claimaudits_unique6'
			restore

			merge 1:1 clm_id using `claimaudits_unique6'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & !missing(InternalClaimID6), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_6 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID6) & InternalClaimID1!=InternalClaimID6, 1, 0)
			tab conflict1_6
			drop conflict1_6
			drop _merge

			// 2.7: uniquely ID'd by provider + dates + diagnosis + paid amount (orig claim amount - amount demanded), rounded to closest dollar
			gen pmt_amt_round = round(pmt_amt, 1)
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen pmt_amt_round = round(Orig_ClaimAmt - DemandAmt, 1)
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round, gen(dup7)
				drop if dup7 > 0
				drop dup7
				tempfile audits_unique7
				save `audits_unique7'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round, gen(dup7)
			preserve
				drop if dup7 > 0
				drop dup7
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round using `audits_unique7'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID7 
				drop pmt_amt_round
				keep InternalClaimID7 clm_id
				tempfile claimaudits_unique7
				save `claimaudits_unique7'
			restore

			merge 1:1 clm_id using `claimaudits_unique7'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & missing(InternalClaimID6) & !missing(InternalClaimID7), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_7 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID7) & InternalClaimID1!=InternalClaimID7, 1, 0)
			tab conflict1_7
			drop conflict1_7
			drop _merge

			// 2.8: uniquely ID'd by provider + dates + diagnosis + paid amount (orig claim amount - amount demanded), floor
			gen pmt_amt_floor = floor(pmt_amt)
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen pmt_amt_floor = floor(Orig_ClaimAmt - DemandAmt)
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor, gen(dup8)
				drop if dup8 > 0
				drop dup8
				
				tempfile audits_unique8
				save `audits_unique8'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor, gen(dup8)
			preserve
				drop if dup8 > 0
				drop dup8
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor using `audits_unique8'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID8 
				drop pmt_amt_floor
				keep InternalClaimID8 clm_id
				tempfile claimaudits_unique8
				save `claimaudits_unique8'
			restore

			merge 1:1 clm_id using `claimaudits_unique8'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & missing(InternalClaimID6) & missing(InternalClaimID7) & !missing(InternalClaimID8), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_8 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID8) & InternalClaimID1!=InternalClaimID8, 1, 0)
			tab conflict1_8
			drop conflict1_8
			
			gen conflict7_8 = cond(!missing(InternalClaimID7) & !missing(InternalClaimID8) & InternalClaimID7!=InternalClaimID8, 1, 0)
			tab conflict7_8
			drop conflict7_8
			drop _merge

			// 2.9: uniquely ID'd by provider + dates + diagnosis + paid amount (orig claim amount), rounded
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen pmt_amt_round = round(Orig_ClaimAmt, 1)
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round, gen(dup9)
				drop if dup9 > 0
				drop dup9
				tempfile audits_unique9
				save `audits_unique9'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round, gen(dup9)
			preserve
				drop if dup9 > 0
				drop dup9
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round using `audits_unique9'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID9 
				drop pmt_amt_round
				keep InternalClaimID9 clm_id
				tempfile claimaudits_unique9
				save `claimaudits_unique9'
			restore

			merge 1:1 clm_id using `claimaudits_unique9'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & missing(InternalClaimID6) & missing(InternalClaimID7) & missing(InternalClaimID8) & !missing(InternalClaimID9), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_9 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID9) & InternalClaimID1!=InternalClaimID9, 1, 0)
			tab conflict1_9
			drop conflict1_9

			gen conflict7_9 = cond(!missing(InternalClaimID7) & !missing(InternalClaimID9) & InternalClaimID7!=InternalClaimID9, 1, 0)
			tab conflict7_9
			drop conflict7_9

			drop _merge

			// 2.10: uniquely ID'd by provider + dats + diagnosis + paid amount (orig claim amount), floor
			preserve
				use "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", clear
				gen icd_dgns_cd1 = trim(OrigDx1Code)
				gen pmt_amt_floor = floor(Orig_ClaimAmt)
				duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor, gen(dup10)
				drop if dup10 > 0
				drop dup10
				tempfile audits_unique10
				save `audits_unique10'
			restore

			duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor, gen(dup10)
			preserve
				drop if dup10 > 0
				drop dup10
				merge 1:1 provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor using `audits_unique10'
				keep if _merge == 3
				drop _merge
				rename InternalClaimID InternalClaimID10 
				drop pmt_amt_floor
				keep InternalClaimID10 clm_id
				tempfile claimaudits_unique10
				save `claimaudits_unique10'
			restore

			merge 1:1 clm_id using `claimaudits_unique10'
			gen newmatch = cond(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & missing(InternalClaimID6) & missing(InternalClaimID7) & missing(InternalClaimID8) & missing(InternalClaimID9) & !missing(InternalClaimID10), 1, 0)
			tab newmatch
			drop newmatch

			gen conflict1_10 = cond(!missing(InternalClaimID1) & !missing(InternalClaimID10) & InternalClaimID1!=InternalClaimID10, 1, 0)
			tab conflict1_10
			drop conflict1_10
			
			gen conflict9_10 = cond(!missing(InternalClaimID9) & !missing(InternalClaimID9) & InternalClaimID9!=InternalClaimID10, 1, 0)
			tab conflict9_10
			drop conflict9_10
			drop _merge

		// Summary stats
		gen assignedatleast1 = cond(!(missing(InternalClaimID1) & missing(InternalClaimID2) & missing(InternalClaimID3) & missing(InternalClaimID4) & missing(InternalClaimID5) & missing(InternalClaimID6) & missing(InternalClaimID7) & missing(InternalClaimID8) & missing(InternalClaimID9) & missing(InternalClaimID10)), 1, 0)
		tab assignedatleast1 

		// Get a list of claims that can never be uniquely identified by 2.1-2.8. If the claim can never be uniquely identified, delete it from the sample as we cannot know if it was audited or not
		/* duplicates tag provider icd_dgns_cd1 from_dt thru_dt, gen(dup1)
		duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup2)
		duplicates tag provider icd_dgns_cd1 from_dt thru_dt icd_prcdr_cd1, gen(dup3)
		duplicates tag provider icd_dgns_cd1 from_dt thru_dt drg_cd, gen(dup4)
		duplicates tag provider from_dt thru_dt drg_cd, gen(dup5)
		duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_round, gen(dup7)
		duplicates tag provider icd_dgns_cd1 from_dt thru_dt pmt_amt_floor, gen(dup8) */
		*egen totdups = rowtotal(dup1 dup2 dup3 dup4 dup5 dup6 dup7 dup8 dup9 dup10)
		gen neveruniqueID = cond(dup1 >0 & dup2 > 0 & dup3> 0 & dup4 > 0 & dup5>0 & dup6 > 0 & dup7>0 & dup8>0 & dup9 >0 & dup10 >0, 1, 0)
		tab neveruniqueID

		*drop if neveruniqueID == 1
		drop assignedatleast1 

		// Assign audit to each claim successively
		gen 	InternalClaimID = InternalClaimID1
		replace InternalClaimID = InternalClaimID2 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID3 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID4 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID5 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID6 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID7 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID8 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID9 if missing(InternalClaimID)
		replace InternalClaimID = InternalClaimID10 if missing(InternalClaimID)


		gen     audit = 0
		replace audit = 1 if !missing(InternalClaimID) 

		capture drop claimid_dup
		// If multiple claims point to the same audit, choose the one with the "earliest" match (i.e., choose the one w/ the InternalClaimID1 match over the one with the InternalClaimID10 match)
		duplicates tag InternalClaimID, gen(claimid_dup)
		preserve
			keep if !missing(InternalClaimID) & claimid_dup > 0 & neveruniqueID == 0
			local N = _N
			if `N' > 0{
				gen     firstnonmissing = 11
				forval i = 1/10{
					replace firstnonmissing = `i' if !missing(InternalClaimID`i') & `i' < firstnonmissing
				}
				bys InternalClaimID: egen firstnonmissing_grp = min(firstnonmissing)
				gen delete = 1 if firstnonmissing != firstnonmissing_grp
				keep clm_id delete
				tempfile deleteddupclaims
				save `deleteddupclaims'
			}
		restore
		if `N' > 0{
			merge 1:1 clm_id using `deleteddupclaims'
			keep if missing(delete)
			drop claimid_dup delete
		}
		capture drop claimid_dup

		save "${DataPath}/claims/ip100pct_`y'_audited.dta", replace 

		*use "${DataPath}/claims/ip100pct_`y'_audited.dta", clear
		gen unabletoid    = cond(missing(InternalClaimID), 1, 0) // Claim matches at least 1 audit using dxcode 1, but cannot be uniquely matched 1-1 
		tab neveruniqueID unabletoid							// Claim shows up in audit data at least once, but is never uniquely ID'd 
										  					     // neveruniqueID should be a subset of unable to ID -- a potentially audited claim can not match either because it a) never was actually audited or b) cannot be uniquely identified by dx1, drg, payment, etc.

		keep if unabletoid == 0 & neveruniqueID == 0 	         // since we're not sure about these claims, delete them 

		// Add audited and non-audited claims together to create crosswalk b/w claims and audit data
		keep clm_id InternalClaimID audit
		append using "${DataPath}/claims/ip100pct_`y'_noaudit.dta", keep(clm_id audit)
		keep clm_id InternalClaimID audit
		
		save "${DataPath}/claims/ip100pct_`y'_auditedxw.dta", replace // this xw the list of all known 1-1 matches between claims and audits. 

																	  // when merged with claims, claims that do not merge are potentially audited but cannot be UNIQUELY matched 1-1. therefore we delete them. call the claims deleted (del_claims)
																	  // when merged with audits, audits that do not merge either do not show up in the claims data, or cannot be UNIQUELY matched 1-1. therefore we should delete them. call the audits deleted (del_audits)
																	  // we expect that many of the audits del_audits overlap with claims in del_claims. however, they can't be matched because they can't be uniquely identified
	} 

	// MERGE XW WITH CLAIMS AND AUDITS 
	forval y = `minyear'/`maxyear'{
	*forval y = 2006/2016{
		use "${McareDataPath}/harm/100pct/ip/`y'/ipc`y'.dta", clear
		duplicates tag clm_id, gen(claimid_dup)
		drop if claimid_dup > 0
		drop claimid_dup
		merge 1:1 clm_id using "${DataPath}/claims/ip100pct_`y'_auditedxw.dta", generate(_merge_clm2xw)
		duplicates tag InternalClaimID, gen(internalclaimid_dup)
		assert internalclaimid_dup == 0 if !missing(InternalClaimID)
		merge m:1 InternalClaimID using "${DataPath}/Audit Data/byclaimtype_1_withCCN_`y'.dta", generate(_merge_xw2aud)
		tab audit _merge_xw2aud, missing
		drop dob_dt gndr_cd race_cd zip_cd
		merge m:1 bene_id using "${McareDataPath}/harm/100pct/bsfab/`y'/bsfab`y'.dta", keepusing(bene_dob sex race g_bene_zip5 death_dt hmoind*) keep(1 3) gen(_merge_bsfab)

		/* merge m:1 bene_id using "${DataPath}/chronicconditions/bsfcc`y'.dta", nogen keep(1 3)
		foreach var of varlist fl_*end fl_*mid n_cc_*end n_cc_*mid atleast*mid atleast*end{
			rename `var' `var'_last
		} */
		save "${DataPath}/claims/ip100pct_`y'_full.dta", replace

	} 

} // mergeaudit_ip


*--------------------------------------------------
* Construct fiscal year datasets for discharges from 2007-2016, separately and then combined
*--------------------------------------------------
if `convert_fyear' == 1{
	forval fyear = 2007/2016{
	*forval fyear = 2009/2010{
		if `fyear'!=2009 & `fyear'!=2010{
			local fyear1 = `fyear' - 1
			use "${DataPath}/claims/ip100pct_`fyear1'_full.dta", clear
			gen mon_disch = month(thru_dt)
			keep if mon_disch >= 10
			preserve
				use "${DataPath}/claims/ip100pct_`fyear'_full.dta", clear
				gen mon_disch = month(thru_dt)
				keep if mon_disch < 10
				tempfile fyear2
				save `fyear2'
			restore
			append using `fyear2'
			gen thru_dt_fyear = `fyear'
			gen drg_vers = thru_dt_fyear - 1983
			drop if missing(file_year)
			save "${DataPath}/claims/ip100pct_fy`fyear'_full.dta", replace
		}
		if `fyear' == 2009{
			use "${DataPath}/claims/ip100pct_2008_full.dta", clear
			gen mon_disch = month(thru_dt)
			keep if mon_disch >= 10
			preserve
				use "${DataPath}/claims/ip100pct_2009_full.dta", clear
				rename g_fileyear file_year
				gen mon_disch = month(thru_dt)
				keep if mon_disch < 10
				tempfile fyear2
				save `fyear2'
			restore
			append using `fyear2'
			gen thru_dt_fyear = `fyear'
			gen drg_vers = thru_dt_fyear - 1983
			drop if missing(file_year)
			save "${DataPath}/claims/ip100pct_fy`fyear'_full.dta", replace
		}

		if `fyear' == 2010{
			use "${DataPath}/claims/ip100pct_2009_full.dta", clear
			rename g_fileyear file_year
			gen mon_disch = month(thru_dt)
			keep if mon_disch >= 10
			preserve
				use "${DataPath}/claims/ip100pct_2010_full.dta", clear
				gen mon_disch = month(thru_dt)
				keep if mon_disch < 10
				tempfile fyear2
				save `fyear2'
			restore
			append using `fyear2'
			gen thru_dt_fyear = `fyear'
			gen drg_vers = thru_dt_fyear - 1983
			drop if missing(file_year)
			save "${DataPath}/claims/ip100pct_fy`fyear'_full.dta", replace
		} 
	}


	
	*--------------------------------------------------
	* Construct combined 2007-2016 dataset with audit information
	*--------------------------------------------------
	// NOTE: Edits made on 5/9/2021: changed min_ndisch from 500 to 0. re-ran so that it includes 2009 now. removed non-MA beneficiaries.
	*local min_ndisch = 500 
	local min_ndisch = 0 // edited in 5/9/2021
	use "${DataPath}/claims/ip100pct_fy2007_full.dta", clear
	di "now appending year: 2007"
	// Restrict to providers with sufficient # of discharges
	bys provider: gen tot_disch = _N
	keep if tot_disch >= `min_ndisch'

	// only include non-MA beneficiaries -- added in 5/9/2021
	keep if (mon_disch == 1 & hmoind01 == "0") | (mon_disch == 2 & hmoind02 == "0") | (mon_disch == 3 & hmoind03 == "0") | (mon_disch == 4 & hmoind04 == "0") | (mon_disch == 5 & hmoind05 == "0") | ///
			(mon_disch == 6 & hmoind06 == "0") | (mon_disch == 7 & hmoind07 == "0") | (mon_disch == 8 & hmoind08 == "0") | (mon_disch == 9 & hmoind09 == "0") | (mon_disch == 10 & hmoind10 == "0") | ///
			(mon_disch == 11 & hmoind11 == "0") | (mon_disch == 12 & hmoind12 == "0")

	// Construct variables
	gen fyear = 2007
	gen overpaid  = cond(OverUnder == "Overpayment", 1, 0)
	gen underpaid = cond(OverUnder == "Underpayment", 1, 0)
	gen OrigPmtAmt = pmt_amt
	replace OrigPmtAmt = Orig_ClaimAmt if !missing(Orig_ClaimAmt)

	replace DemandAmt = 0 if missing(DemandAmt)

	gen DemandAmt_over = DemandAmt if overpaid == 1
	replace DemandAmt_over = 0 if missing(DemandAmt_over)

	gen DemandAmt_under = DemandAmt if underpaid == 1
	replace DemandAmt_under = 0 if missing(DemandAmt_under)

	gen p_demandamt       = DemandAmt / OrigPmtAmt
	gen p_demandamt_over  = DemandAmt_over / OrigPmtAmt
	gen p_demandamt_under = DemandAmt_under / OrigPmtAmt

	gen year_audit = year(Date_InitSelect)

	foreach var of varlist audit overpaid underpaid{
		forval year = 2010/2015{
			gen `var'_by`year'     = 0
			replace `var'_by`year' = 1 if year_audit <= `year'
			label variable `var'_by`year' "audit by `year'"

			gen `var'_in`year'     = 0
			replace `var'_in`year' = 1 if year_audit == `year'
			label variable `var'_in`year' "audit in `year'"
		}	
	}

	gen LOS = thru_dt - from_dt

	destring drg_cd, replace

	gen     OrigDRG2 = drg_cd
	replace OrigDRG2 = OrigDRG if !missing(OrigDRG)
	label var OrigDRG2 "Original DRG (edited)" 

	gen age_disch = (thru_dt - bene_dob)/365
	gen age_round = round(age_disch, 1)

	egen mon_fyear = group(mon_disch fyear)

	keep audit* overpaid* underpaid* Demand* p_demand* Orig* pmt* LOS mon_disch drg_cd OrigDRG OrigDRG2 src_adms age_disch age_round race sex prstate drg_vers file_year clm_id provider bene_dob bene_id g_bene_zip5 death_dt

	egen provider_tag = tag(provider)

	// Label outcome variables
	label variable audit 		     "audit rate"
	label variable overpaid 	     "overpaid rate"
	label variable underpaid 	     "underpaid rate"
	label variable DemandAmt 	     "demanded amt ($)"
	label variable DemandAmt_over    "overpayments demanded amt ($)"
	label variable DemandAmt_under   "underpayments restored amt ($)"
	label variable p_demandamt       "share claim demanded or restored"
	label variable p_demandamt_over  "share claim overpaid"
	label variable p_demandamt_under "share claim underpaid"


	forval y = 2008/2016{
		di "now appending year: `y'"
		local last_y = `y' - 1
		preserve
			// Define outcome variables and covariates
				use "${DataPath}/claims/ip100pct_fy`y'_full.dta", clear

				// Restrict to providers with sufficient # of discharges
				bys provider: gen tot_disch = _N
				keep if tot_disch >= `min_ndisch'

				// only include non-MA beneficiaries -- added in 5/9/2021
				keep if (mon_disch == 1 & hmoind01 == "0") | (mon_disch == 2 & hmoind02 == "0") | (mon_disch == 3 & hmoind03 == "0") | (mon_disch == 4 & hmoind04 == "0") | (mon_disch == 5 & hmoind05 == "0") | ///
						(mon_disch == 6 & hmoind06 == "0") | (mon_disch == 7 & hmoind07 == "0") | (mon_disch == 8 & hmoind08 == "0") | (mon_disch == 9 & hmoind09 == "0") | (mon_disch == 10 & hmoind10 == "0") | ///
						(mon_disch == 11 & hmoind11 == "0") | (mon_disch == 12 & hmoind12 == "0")
						
				// Construct variables
				gen fyear = `y'
				gen overpaid  = cond(OverUnder == "Overpayment", 1, 0)
				gen underpaid = cond(OverUnder == "Underpayment", 1, 0)
				gen OrigPmtAmt = pmt_amt
				replace OrigPmtAmt = Orig_ClaimAmt if !missing(Orig_ClaimAmt)

				replace DemandAmt = 0 if missing(DemandAmt)

				gen DemandAmt_over = DemandAmt if overpaid == 1
				replace DemandAmt_over = 0 if missing(DemandAmt_over)

				gen DemandAmt_under = DemandAmt if underpaid == 1
				replace DemandAmt_under = 0 if missing(DemandAmt_under)

				gen p_demandamt       = DemandAmt / OrigPmtAmt
				gen p_demandamt_over  = DemandAmt_over / OrigPmtAmt
				gen p_demandamt_under = DemandAmt_under / OrigPmtAmt

				gen year_audit = year(Date_InitSelect)

				foreach var of varlist audit overpaid underpaid{
					forval year = 2010/2015{
						gen `var'_by`year'     = 0
						replace `var'_by`year' = 1 if year_audit <= `year'
						label variable `var'_by`year' "audit by `year'"

						gen `var'_in`year'     = 0
						replace `var'_in`year' = 1 if year_audit == `year'
						label variable `var'_in`year' "audit in `year'"
					}	
				}

				gen LOS = thru_dt - from_dt

				destring drg_cd, replace

				gen     OrigDRG2 = drg_cd
				replace OrigDRG2 = OrigDRG if !missing(OrigDRG)
				label var OrigDRG2 "Original DRG (edited)" 

				gen age_disch = (thru_dt - bene_dob)/365
				gen age_round = round(age_disch, 1)

				egen mon_fyear = group(mon_disch fyear)

				keep audit* overpaid* underpaid* Demand* p_demand* Orig* pmt* LOS mon_disch drg_cd OrigDRG OrigDRG2 src_adms age_disch age_round race sex prstate drg_vers file_year clm_id provider bene_dob bene_id g_bene_zip5 death_dt

				egen provider_tag = tag(provider)

			// Label outcome variables
				label variable audit 		     "audit rate"
				label variable overpaid 	     "overpaid rate"
				label variable underpaid 	     "underpaid rate"
				label variable DemandAmt 	     "demanded amt ($)"
				label variable DemandAmt_over    "overpayments demanded amt ($)"
				label variable DemandAmt_under   "underpayments restored amt ($)"
				label variable p_demandamt       "share claim demanded or restored"
				label variable p_demandamt_over  "share claim overpaid"
				label variable p_demandamt_under "share claim underpaid"

				tempfile currentyear
				save `currentyear'
		restore
		append using `currentyear'
		save "${DataPath}/claims/ip100pct_fy0716_full.dta", replace 
	} 


	gen 	fyear = file_year
	replace fyear = fyear+1 if mon_disch >= 10

	tab file_year

	save "${DataPath}/claims/ip100pct_fy0716_full.dta", replace 

} // count_discharges



*--------------------------------------------------
* Create hospital-level audit rate summaries
*--------------------------------------------------
if `sum_hosp_level' == 1{
	// Create hospital-level audit rate summaries
	use fyear file_year mon_disch audit_in* audit_by* overpaid_in* overpaid_by* DemandAmt provider using "${DataPath}/claims/ip100pct_fy0716_full.dta", clear
	*gen 	fyear = file_year
	*replace fyear = fyear+1 if mon_disch >= 10
	
	forval fy = 2010/2015{
		gen 	auditrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
		replace auditrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & audit_in`fy' == 1

		gen 	auditrate_by`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
		replace auditrate_by`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & audit_by`fy' == 1

		gen 	overpaidrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
		replace overpaidrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & overpaid_in`fy' == 1

		gen 	overpaidrate_by`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
		replace overpaidrate_by`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & overpaid_by`fy' == 1

	}
	gcollapse (mean) auditrate_* overpaidrate_*  ///
			 (count) totaudclaims_2010 = auditrate_in2010 totaudclaims_2011 = auditrate_in2011 totaudclaims_2012 = auditrate_in2012 totaudclaims_2013 = auditrate_in2013 totaudclaims_2014 = auditrate_in2014 totaudclaims_2015 = auditrate_in2015 ///
			 (sum) totaudits_2010 = auditrate_in2010 totaudits_2011 = auditrate_in2011 totaudits_2012 = auditrate_in2012 totaudits_2013 = auditrate_in2013 totaudits_2014 = auditrate_in2014 totaudits_2015 = auditrate_in2015 , by(provider)
	save "${DataPath}/claims/auditrates_0716.dta", replace 

	// demand rates (percent of claims where a demand was made)
		use fyear file_year mon_disch audit_in* audit_by* overpaid_in* overpaid_by* DemandAmt provider using "${DataPath}/claims/ip100pct_fy0716_full.dta", clear
		*gen 	fyear = file_year
		*replace fyear = fyear+1 if mon_disch >= 10

		forval fy = 2010/2015{
			gen 	demandrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace demandrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & DemandAmt!= 0 & audit_in`fy' == 1

			gen 	demandoverrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace demandoverrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & DemandAmt > 0 & audit_in`fy' == 1


		}
		gcollapse (mean) demandrate_* demandoverrate_* ///
				 (count) totaudclaims_2010 = demandrate_in2010 totaudclaims_2011 = demandrate_in2011 totaudclaims_2012 = demandrate_in2012 totaudclaims_2013 = demandrate_in2013 totaudclaims_2014 = demandrate_in2014 totaudclaims_2015 = demandrate_in2015 ///
			     (sum) totdemands_2010 = demandrate_in2010 totdemands_2011 = demandrate_in2011 totdemands_2012 = demandrate_in2012 totdemands_2013 = demandrate_in2013 totdemands_2014 = demandrate_in2014 totdemands_2015 = demandrate_in2015 ///
			      , by(provider)
	    
		save "${DataPath}/claims/auditdemandrates_0716.dta", replace

		

	// Create hospital-level audit rate summaries, by year of claim
		use "${DataPath}/claims/ip100pct_fy0716_full.dta", clear
		*gen 	fyear = file_year
		*replace fyear = fyear+1 if mon_disch >= 10
		forval fy = 2010/2015{
			gen 	auditrate_min3_in`fy' = 0 if fyear == `fy'-3
			replace auditrate_min3_in`fy' = 1 if fyear == `fy'-3 & audit_in`fy' == 1

			gen 	auditrate_min2_in`fy' = 0 if fyear == `fy'-2
			replace auditrate_min2_in`fy' = 1 if fyear == `fy'-2 & audit_in`fy' == 1

			gen 	auditrate_min1_in`fy' = 0 if fyear == `fy'-1
			replace auditrate_min1_in`fy' = 1 if fyear == `fy'-1 & audit_in`fy' == 1

			gen 	auditrate_min0_in`fy' = 0 if fyear == `fy'
			replace auditrate_min0_in`fy' = 1 if fyear == `fy' & audit_in`fy' == 1

			gen 	overpaidrate_min3_in`fy' = 0 if fyear == `fy'-3
			replace overpaidrate_min3_in`fy' = 1 if fyear == `fy'-3 & overpaid_in`fy' == 1

			gen 	overpaidrate_min2_in`fy' = 0 if fyear == `fy'-2
			replace overpaidrate_min2_in`fy' = 1 if fyear == `fy'-2 & overpaid_in`fy' == 1

			gen 	overpaidrate_min1_in`fy' = 0 if fyear == `fy'-1
			replace overpaidrate_min1_in`fy' = 1 if fyear == `fy'-1 & overpaid_in`fy' == 1

			gen 	overpaidrate_min0_in`fy' = 0 if fyear == `fy'
			replace overpaidrate_min0_in`fy' = 1 if fyear == `fy' & overpaid_in`fy' == 1
			
		}

		gcollapse (mean) auditrate_* overpaidrate_* , by(provider fyear)
		save "${DataPath}/claims/auditrates3y_0716.dta", replace

	// Create hospital-level audit count summaries
		use "${DataPath}/claims/ip100pct_fy0716_full.dta", clear

		gcollapse (sum) audit_* overpaid_* , by(provider)
		rename audit_* n_audit_*
		rename overpaid_* n_overpaid_*

		save "${DataPath}/claims/n_audits_0716.dta", replace

	// Create hospital-level audit count summaries, by year of claim
		use "${DataPath}/claims/ip100pct_fy0716_full.dta", clear
		*gen 	fyear = file_year
		*replace fyear = fyear+1 if mon_disch >= 10
		forval fy = 2010/2015{
			gen 	audit_min3_in`fy' = 0 if fyear == `fy'-3
			replace audit_min3_in`fy' = 1 if fyear == `fy'-3 & audit_in`fy' == 1

			gen 	audit_min2_in`fy' = 0 if fyear == `fy'-2
			replace audit_min2_in`fy' = 1 if fyear == `fy'-2 & audit_in`fy' == 1

			gen 	audit_min1_in`fy' = 0 if fyear == `fy'-1
			replace audit_min1_in`fy' = 1 if fyear == `fy'-1 & audit_in`fy' == 1

			gen 	audit_min0_in`fy' = 0 if fyear == `fy'
			replace audit_min0_in`fy' = 1 if fyear == `fy' & audit_in`fy' == 1

			gen 	overpaid_min3_in`fy' = 0 if fyear == `fy'-3
			replace overpaid_min3_in`fy' = 1 if fyear == `fy'-3 & overpaid_in`fy' == 1

			gen 	overpaid_min2_in`fy' = 0 if fyear == `fy'-2
			replace overpaid_min2_in`fy' = 1 if fyear == `fy'-2 & overpaid_in`fy' == 1

			gen 	overpaid_min1_in`fy' = 0 if fyear == `fy'-1
			replace overpaid_min1_in`fy' = 1 if fyear == `fy'-1 & overpaid_in`fy' == 1

			gen 	overpaid_min0_in`fy' = 0 if fyear == `fy'
			replace overpaid_min0_in`fy' = 1 if fyear == `fy' & overpaid_in`fy' == 1
			
		}
		gcollapse (sum) audit_* overpaid_* , by(provider fyear)
		rename audit_* n_audit_*
		rename overpaid_* n_overpaid_*
		save "${DataPath}/claims/n_audits3y_0716.dta", replace 
} // sum_hosp_level

*--------------------------------------------------
* Create within-hospital-level audit rate summaries
*--------------------------------------------------
if `sum_within_hosp' == 1{
		// Create hospital-level audit rate summaries by MDC, LOS, patient age, number of CC's
		use fyear audit_in* audit_by* overpaid_in* overpaid_by* ///
		    LOS drg_cd provider mon_disch file_year DemandAmt age_disch using "${DataPath}/claims/ip100pct_fy0716_full.dta", clear

		rename drg_cd drg

	

		merge m:1 drg using "${DataPath}/base-msdrg-v26.dta", keep(1 3) nogen //  DRG to base DRG mapping available in replication packet

		*gen LOS = thru_dt - from_dt

		capture gen LOSbin = LOS
		replace LOSbin = 2150 if LOS >= 21 & LOS <= 50
		replace LOSbin = 51100 if LOS >= 51 & LOS <= 100
		replace LOSbin = 101 if LOS >= 101

		// age bin
		egen agebin = cut(age_disch), at(45, 55,65, 75, 85, 95, 105)

	/* 	// n cc's bin
		gen n_cc_bin = n_cc
		replace n_cc_bin = 5 if n_cc >= 5 */

		forval fy = 2010/2015{
			gen 	auditrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace auditrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & audit_in`fy' == 1

			gen 	auditrate_by`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace auditrate_by`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & audit_by`fy' == 1

			gen 	overpaidrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace overpaidrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & overpaid_in`fy' == 1

			gen 	overpaidrate_by`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace overpaidrate_by`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & overpaid_by`fy' == 1
		}

		forval fy = 2010/2015{
			gen 	demandrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace demandrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & DemandAmt!= 0 & audit_in`fy' == 1

			gen 	demandoverrate_in`fy' = 0 if fyear >= `fy'-3 & fyear <= `fy'
			replace demandoverrate_in`fy' = 1 if fyear >= `fy'-3 & fyear <= `fy' & DemandAmt > 0 & audit_in`fy' == 1


		}
		
		//los
		preserve
			gcollapse (mean) auditrate_* overpaidrate_* demandrate_* demandoverrate_* ///
			(count) totaudclaims_2010 = auditrate_in2010 totaudclaims_2011 = auditrate_in2011 totaudclaims_2012 = auditrate_in2012 totaudclaims_2013 = auditrate_in2013 totaudclaims_2014 = auditrate_in2014 totaudclaims_2015 = auditrate_in2015 ///
			(sum) totaudits_2010 = auditrate_in2010 totaudits_2011 = auditrate_in2011 totaudits_2012 = auditrate_in2012 totaudits_2013 = auditrate_in2013 totaudits_2014 = auditrate_in2014 totaudits_2015 = auditrate_in2015 ///
			totdemands_2010 = demandrate_in2010 totdemands_2011 = demandrate_in2011 totdemands_2012 = demandrate_in2012 totdemands_2013 = demandrate_in2013 totdemands_2014 = demandrate_in2014 totdemands_2015 = demandrate_in2015, ///
			by(provider LOSbin)
			save "${DataPath}/claims/auditrates_byLOSbin_0716.dta", replace
		restore 


		// top 20 error rate (based on 2010 CERT report of DRGs with highest error rates - see description in manuscript)
		preserve
			destring drg, replace
			gen 	drg_toperror = 1 if drg == 469 | drg == 470
			replace drg_toperror = 2 if drg >= 242 & drg <= 244
			replace drg_toperror = 3 if drg == 246 | drg == 247
			replace drg_toperror = 4 if drg == 871 | drg == 872
			replace drg_toperror = 5 if drg == 313
			replace drg_toperror = 6 if drg >= 377 & drg <= 379
			replace drg_toperror = 7 if drg >= 329 & drg <= 331
			replace drg_toperror = 8 if drg >= 177 & drg <= 179
			replace drg_toperror = 9 if drg == 391 | drg == 392
			replace drg_toperror = 10 if drg == 689 | drg == 690
			replace drg_toperror = 11 if drg == 640 | drg == 641
			replace drg_toperror = 12 if drg >= 682 & drg <= 684
			replace drg_toperror = 13 if drg == 312
			replace drg_toperror = 14 if drg >= 291 & drg <= 293
			replace drg_toperror = 15 if drg >= 308 & drg <= 310
			replace drg_toperror = 16 if drg >= 193 & drg <= 195
			replace drg_toperror = 17 if drg >= 280 & drg <= 282
			replace drg_toperror = 18 if drg >= 190 & drg <= 192
			replace drg_toperror = 19 if drg >= 480 & drg <= 482
			replace drg_toperror = 20 if drg >= 64 & drg <= 66
			replace drg_toperror = 100 if missing(drg_toperror)

			gcollapse (mean) auditrate_* overpaidrate_* demandrate_* demandoverrate_* ///
			  (count) totaudclaims_2010 = auditrate_in2010 totaudclaims_2011 = auditrate_in2011 totaudclaims_2012 = auditrate_in2012 totaudclaims_2013 = auditrate_in2013 totaudclaims_2014 = auditrate_in2014 totaudclaims_2015 = auditrate_in2015 ///
		 	  (sum) totaudits_2010 = auditrate_in2010 totaudits_2011 = auditrate_in2011 totaudits_2012 = auditrate_in2012 totaudits_2013 = auditrate_in2013 totaudits_2014 = auditrate_in2014 totaudits_2015 = auditrate_in2015 ///
		 	  totdemands_2010 = demandrate_in2010 totdemands_2011 = demandrate_in2011 totdemands_2012 = demandrate_in2012 totdemands_2013 = demandrate_in2013 totdemands_2014 = demandrate_in2014 totdemands_2015 = demandrate_in2015 ///
		 	  ,by(drg_toperror)
		 	save "${DataPath}/claims/auditrates_by_top20msdrg_0716.dta", replace

		restore

		// all DRGs
		preserve
			gcollapse (mean) auditrate_* overpaidrate_* demandrate_* demandoverrate_* ///
				  (count) totaudclaims_2010 = auditrate_in2010 totaudclaims_2011 = auditrate_in2011 totaudclaims_2012 = auditrate_in2012 totaudclaims_2013 = auditrate_in2013 totaudclaims_2014 = auditrate_in2014 totaudclaims_2015 = auditrate_in2015 ///
			 	  (sum) totaudits_2010 = auditrate_in2010 totaudits_2011 = auditrate_in2011 totaudits_2012 = auditrate_in2012 totaudits_2013 = auditrate_in2013 totaudits_2014 = auditrate_in2014 totaudits_2015 = auditrate_in2015 ///
			 	  totdemands_2010 = demandrate_in2010 totdemands_2011 = demandrate_in2011 totdemands_2012 = demandrate_in2012 totdemands_2013 = demandrate_in2013 totdemands_2014 = demandrate_in2014 totdemands_2015 = demandrate_in2015 ///
			 	  ,by(drg)
			save "${DataPath}/claims/auditrates_by_alldrg_0716.dta", replace
		restore

		// base DRGs
		preserve
			replace basedrg = 1000 if missing(basedrg)
			gcollapse (mean) auditrate_* overpaidrate_* demandrate_* demandoverrate_* ///
				  (count) totaudclaims_2010 = auditrate_in2010 totaudclaims_2011 = auditrate_in2011 totaudclaims_2012 = auditrate_in2012 totaudclaims_2013 = auditrate_in2013 totaudclaims_2014 = auditrate_in2014 totaudclaims_2015 = auditrate_in2015 ///
			 	  (sum) totaudits_2010 = auditrate_in2010 totaudits_2011 = auditrate_in2011 totaudits_2012 = auditrate_in2012 totaudits_2013 = auditrate_in2013 totaudits_2014 = auditrate_in2014 totaudits_2015 = auditrate_in2015 ///
			 	  totdemands_2010 = demandrate_in2010 totdemands_2011 = demandrate_in2011 totdemands_2012 = demandrate_in2012 totdemands_2013 = demandrate_in2013 totdemands_2014 = demandrate_in2014 totdemands_2015 = demandrate_in2015 ///
			 	  ,by(basedrg)
			save "${DataPath}/claims/auditrates_by_basedrg_0716.dta", replace
		restore


} //sum_within_hosp


